library(plyr)
library(ggplot2)
library(rstudioapi)
# Calcular la mitja Circular del vector X amb els noms dels angles
circular.mean <- function(X) {
x <- mean(cos(as.numeric(names(X))) * X)
y <- mean(sin(as.numeric(names(X))) * X)
n <- sum(X)
theta.mean <- atan2(y, x)
if(theta.mean < 0)
theta.mean = theta.mean + 2 * pi
theta.var = sqrt(x^2 + y^2) / n
return(c(theta.mean, theta.var))
}
# Calcular la mitja donat el vector de dades (X) la quantitat per cada classe (Y) i el nou ordre (I)
resample.means <- function(X, Y, I) {
n <- length(Y)
Y.means <- numeric(n)
X.index = 0
for(i in 1:n) {
Y.means[i] <- mean(X[I[(X.index + 1):(X.index + Y[i])]])
X.index = X.index + Y[i]
}
names(Y.means) <- names(Y)
return(Y.means)
}
# Agafar directori del fitxer
path = dirname(rstudioapi::getSourceEditorContext()$path)
ds.og.file = paste(path, "/Accidents_Trafic_Catalunya.csv", sep = '')
ds.file = paste(path, "/ATC.csv", sep = '')
update = F
if(update || !file.exists(ds.file)) {
dataset <- read.csv(ds.og.file)
# Corregir l'hora
for(i in 1:nrow(dataset)) {
di <- dataset[i,]
if(di$grupHor != 'Nit') {
if(di$hor < 600) {
if(di$hor < 60)
di$hor = di$hor * 100
else
di$hor = di$hor * 10
}
} else {
if(di$hor < 60 || (220 <= di$hor && di$hor < 240)) {
if(di$hor < 6 || (21 < di$hor && di$hor < 24))
di$hor = di$hor * 100
else if(0 < di$hor%%10 && di$hor%%10 < 5)
di$hor = di$hor * 10
}
}
dataset$hor[i] = di$hor%/%100
dataset$min[i] = di$hor%%100
}
# Passar a data i extreure els factors importanta
dataset$data = as.Date(dataset$dat, '%d/%m/%Y')
dataset$diaSem = factor(weekdays(dataset$data))
dataset$mes = factor(months(dataset$data))
# Eliminar velocitats falses
dataset$C_VELOCITAT_VIA[dataset$C_VELOCITAT_VIA == 999] = NA
dataset$C_VELOCITAT_VIA[dataset$C_VELOCITAT_VIA == 99] = NA
dataset$C_VELOCITAT_VIA[dataset$C_VELOCITAT_VIA == 0] = NA
# Guardar fitxer corretgit
write.csv(dataset, file = ds.file)
} else {
# Carrregar fitxer corregit
dataset <- read.csv(ds.file)
dataset$data = as.Date(dataset$dat, '%d/%m/%Y')
}
# Vectors rellevants
nom.hor <- format(ISOdate(0,1,1, 0:23),"%Hh")
color.grupHor <- c(rep('darkblue', 6), rep('yellow', 8), rep('orange', 8), rep('darkblue', 2))
val.hor <- 0:24
# DataFrame de valor importants
d.hora.VM <- data.frame(victimes = tapply(dataset$F_VICTIMES, dataset$hor, mean)[val.hor],
morts = tapply(dataset$F_MORTS, dataset$hor, mean)[val.hor],
n = tapply(dataset$F_VICTIMES, dataset$hor, length)[val.hor],
nom = nom.hor,
color = color.grupHor)
# Plots de les dades
ggplot(data = d.hora.VM, aes(x = nom, y = n), col) +
geom_bar(width = 1, stat = "identity", position = "dodge", fill = d.hora.VM$color, color = 'black') +
ggtitle("Histograma de numero d\'accidents") + ylab("Total") + xlab("Hora") +
coord_polar()
ggplot(data = d.hora.VM, aes(x = nom, y = victimes), col) +
geom_bar(width = 1, stat = "identity", position = "dodge", fill = d.hora.VM$color, color = 'black') +
ggtitle("Victimes per accident") + ylab("Mitja") + xlab("Hora") +
coord_polar()
ggplot(data = d.hora.VM, aes(x = nom, y = morts), col) +
geom_bar(width = 1, stat = "identity", position = "dodge", fill = d.hora.VM$color, color = 'black') +
ggtitle("Morts per accident") + ylab("Mitja") + xlab("Hora") +
coord_polar()
# Conjunt de dades amb l'hora i l'angle
d.hora.all <- data.frame(victimes = dataset$F_VICTIMES,
morts = dataset$F_MORTS,
hora = dataset$hor,
theta = dataset$hor * pi / 12)
# Resultats de la mitja circular
d.hora.V <- circular.mean(tapply(d.hora.all$victimes, d.hora.all$theta, mean))
d.hora.M <- circular.mean(tapply(d.hora.all$morts, d.hora.all$theta, mean))
d.hora.theta.V <- d.hora.V[1]
d.hora.var.V <- d.hora.V[2]
d.hora.theta.M <- d.hora.M[1]
d.hora.var.M <- d.hora.M[2]
# Preparació pel test permutacional
n.rep <- 10000
n.set <- nrow(d.hora.all)
d.hora.rep <- tapply(d.hora.all$hora, d.hora.all$theta, length)
p.hora.theta.V <- numeric(n.rep)
p.hora.var.V <- numeric(n.rep)
p.hora.theta.M <- numeric(n.rep)
p.hora.var.M <- numeric(n.rep)
# Fer les permutacions
for(i in 1:n.rep) {
perm <- sample(n.set)
p.hora.V <- circular.mean(resample.means(d.hora.all$victimes, d.hora.rep ,perm))
p.hora.M <- circular.mean(resample.means(d.hora.all$morts, d.hora.rep, perm))
p.hora.theta.V[i] <- p.hora.V[1]
p.hora.var.V[i] <- p.hora.V[2]
p.hora.theta.M[i] <- p.hora.M[1]
p.hora.var.M[i] <- p.hora.M[2]
}
# Plot dels resultats (Victimes)
plot.hora.limit.V <- max(c(p.hora.var.V, d.hora.var.V * 1.1))
hist(p.hora.var.V, xlim = c(0, plot.hora.limit.V), main = '"Variancia" de l\'hora per victimes')
p.value.hora.V <- sum(p.hora.var.V >= d.hora.var.V) / n.rep
lines(rep(d.hora.var.V, 2), c(0, n.rep), col = 'red')
# Plot dels resultats (Morts)
plot.hora.limit.M <- max(c(p.hora.var.M, d.hora.var.M * 1.1))
hist(p.hora.var.M, xlim = c(0, plot.hora.limit.M), main = '"Variancia" de l\'hora per morts')
p.value.hora.M <- sum(p.hora.var.M >= d.hora.var.M) / n.rep
lines(rep(d.hora.var.M, 2), c(0, n.rep), col = 'red')
# Mostra les dades
{
cat('VÃctimes p-value:', p.value.hora.V, '\n')
cat('Morts p-value:', p.value.hora.M, '\n')
cat('VÃctimes mitja circular:', d.hora.theta.V * 12 / pi, ', varià ncia circular:', d.hora.var.V, '\n')
cat('Morts mitja circular:', d.hora.theta.M * 12 / pi, ', varià ncia circular:', d.hora.var.M, '\n')
}
## VÃctimes p-value: 0
## Morts p-value: 0
## VÃctimes mitja circular: 2.015411 , varià ncia circular: 0.001635155
## Morts mitja circular: 2.603732 , varià ncia circular: 0.008263661
# Scatter plots de les permutacions (Victimes)
plot.hora.limitX.V <- c(min(p.hora.var.V * sin(p.hora.theta.V)), d.hora.var.V * sin(d.hora.theta.V) * 1.1)
plot.hora.limitY.V <- c(min(p.hora.var.V * cos(p.hora.theta.V)), d.hora.var.V * cos(d.hora.theta.V) * 1.1)
plot(p.hora.var.V * sin(p.hora.theta.V), p.hora.var.V * cos(p.hora.theta.V),
asp = 1, xlim = plot.hora.limitX.V, ylim = plot.hora.limitY.V, col = 'blue', lwd = .1, cex = .3,
xlab = '', ylab = '', main = 'scatter plot de les permutacions per victimes')
points(d.hora.var.V * sin(d.hora.theta.V), d.hora.var.V * cos(d.hora.theta.V),
col = 'red', cex = .5)
# Scatter plots de les permutacions (Morts)
plot.hora.limitX.M <- c(min(p.hora.var.M * sin(p.hora.theta.M)), d.hora.var.M * sin(d.hora.theta.M) * 1.1)
plot.hora.limitY.M <- c(min(p.hora.var.M * cos(p.hora.theta.M)), d.hora.var.M * cos(d.hora.theta.M) * 1.1)
plot(p.hora.var.M * sin(p.hora.theta.M), p.hora.var.M * cos(p.hora.theta.M),
asp = 1, xlim = plot.hora.limitX.M, ylim = plot.hora.limitY.M, col = 'blue', lwd = .1, cex = .3,
xlab = '', ylab = '', main = 'scatter plot de les permutacions per morts')
points(d.hora.var.M * sin(d.hora.theta.M), d.hora.var.M * cos(d.hora.theta.M),
col = 'red', cex = .5)
# Vectors rellevants
nom.diaSem <- weekdays(as.Date('24/05/2021', '%d/%m/%Y')+0:6)
nom.tipusDia <- c(rep('laborable', 5), rep('fi de setmana', 2))
color.feiner <- c(rep('red', 5), rep('blue', 2))
# DataFrame de les dades importants
d.dia.VM <- data.frame(victimes = tapply(dataset$F_VICTIMES, dataset$diaSem, mean)[nom.diaSem],
morts = tapply(dataset$F_MORTS, dataset$diaSem, mean)[nom.diaSem],
n = tapply(dataset$F_VICTIMES, dataset$diaSem, length)[nom.diaSem],
nom = factor(nom.diaSem, levels = nom.diaSem),
color = color.feiner,
tipus = factor(nom.tipusDia, levels = c('laborable', 'fi de setmana')))
# Plots de les dades
ggplot(data = d.dia.VM, aes(y = n, x = nom)) +
geom_bar(stat = "identity", position = "dodge", fill = d.dia.VM$color, color = 'black') +
ggtitle("Histograma de numero d\'accidents") + ylab("Total") + xlab("Dia de la setmana")
ggplot(data = d.dia.VM, aes(y = victimes, x = nom)) +
geom_bar(stat = "identity", position = "dodge", fill = d.dia.VM$color, color = 'black') +
ggtitle("Victimes per accident") + ylab("Mitja") + xlab("Dia de la setmana")
ggplot(data = d.dia.VM, aes(y = morts, x = nom)) +
geom_bar(stat = "identity", position = "dodge", fill = d.dia.VM$color, color = 'black') +
ggtitle("Morts per accident") + ylab("Mitja") + xlab("Dia de la setmana")
# agafar dades respecte el dia de la setmana
d.dia.all <- data.frame(victimes = dataset$F_VICTIMES,
morts = dataset$F_MORTS,
dia = factor(dataset$diaSem, levels = nom.diaSem),
tipus = factor(dataset$diaSem, levels = nom.diaSem))
levels(d.dia.all$tipus) <- nom.tipusDia
# Generem els models lineals per probar
d.dia.fit.lab.V <- lm(d.dia.all$victimes[d.dia.all$tipus == 'laborable']~
factor(d.dia.all$dia[d.dia.all$tipus == 'laborable']))
d.dia.fit.lab.M <- lm(d.dia.all$morts[d.dia.all$tipus == 'laborable']~
factor(d.dia.all$dia[d.dia.all$tipus == 'laborable']))
d.dia.fit.dia6.V <- lm(d.dia.all$victimes[d.dia.all$dia != nom.diaSem[7]]~
factor(d.dia.all$tipus[d.dia.all$dia != nom.diaSem[7]]))
d.dia.fit.dia6.M <- lm(d.dia.all$morts[d.dia.all$dia != nom.diaSem[7]]~
factor(d.dia.all$tipus[d.dia.all$dia != nom.diaSem[7]]))
d.dia.fit.fds.V <- lm(d.dia.all$victimes[d.dia.all$tipus == 'fi de setmana']~
factor(d.dia.all$dia[d.dia.all$tipus == 'fi de setmana']))
d.dia.fit.fds.M <- lm(d.dia.all$morts[d.dia.all$tipus == 'fi de setmana']~
factor(d.dia.all$dia[d.dia.all$tipus == 'fi de setmana']))
# Comparació amb test anova (aov)
{
cat('p-value per victimes comparant dies laborables:',
summary(aov(d.dia.fit.lab.V))[[1]]$`Pr(>F)`[1], '\n')
cat('p-value per morts comparant dies laborables:',
summary(aov(d.dia.fit.lab.M))[[1]]$`Pr(>F)`[1], '\n')
cat('Tots els dies laborables són iguals\n\n')
cat('p-value per victimes comparant dies laborables i dissabte:',
summary(aov(d.dia.fit.dia6.V))[[1]]$`Pr(>F)`[1], '\n')
cat('p-value per morts comparant dies laborables i dissabte:',
summary(aov(d.dia.fit.dia6.M))[[1]]$`Pr(>F)`[1], '\n')
cat('A dissabte el numero per accident augmenta (respecte els laborables)\n\n')
cat('p-value per victimes comparant dissabte i diumenge:',
summary(aov(d.dia.fit.fds.V))[[1]]$`Pr(>F)`[1], '\n')
cat('p-value per morts comparant dies dissabte i diumenge:',
summary(aov(d.dia.fit.fds.M))[[1]]$`Pr(>F)`[1], '\n')
cat('A diumenge el numero de victimes accident augmenta signnificativament pero el de morts no (Respecte dissabte)\n\n')
}
## p-value per victimes comparant dies laborables: 0.4398894
## p-value per morts comparant dies laborables: 0.961798
## Tots els dies laborables són iguals
##
## p-value per victimes comparant dies laborables i dissabte: 2.363101e-23
## p-value per morts comparant dies laborables i dissabte: 0.00154988
## A dissabte el numero per accident augmenta (respecte els laborables)
##
## p-value per victimes comparant dissabte i diumenge: 0.002842418
## p-value per morts comparant dies dissabte i diumenge: 0.2385609
## A diumenge el numero de victimes accident augmenta signnificativament pero el de morts no (Respecte dissabte)
# Vectors rellevants
nom.mes <- format(ISOdate(0,1:12,1), '%B')
color.estacio <- c(rep('cyan', 2), rep('green', 3), rep('yellow', 3), rep('orange', 3), 'cyan')
# DataFrame de les dades importants
d.mes.VM <- data.frame(victimes = tapply(dataset$F_VICTIMES, dataset$mes, mean)[nom.mes],
morts = tapply(dataset$F_MORTS, dataset$mes, mean)[nom.mes],
n = tapply(dataset$F_VICTIMES, dataset$mes, length)[nom.mes],
nom = factor(nom.mes, levels = nom.mes),
color = color.estacio)
# Plots de les dades
ggplot(data = d.mes.VM, aes(x = nom, y = n), col) +
geom_bar(width = 1, stat = "identity", position = "dodge", fill = d.mes.VM$color, color = 'black') +
ggtitle("Histograma de numero d\'accidents") + ylab("Total") + xlab("Hora") +
coord_polar()
ggplot(data = d.mes.VM, aes(x = nom, y = victimes), col) +
geom_bar(width = 1, stat = "identity", position = "dodge", fill = d.mes.VM$color, color = 'black') +
ggtitle("Victimes per accident") + ylab("Mitja") + xlab("Hora") +
coord_polar()
ggplot(data = d.mes.VM, aes(x = nom, y = morts), col) +
geom_bar(width = 1, stat = "identity", position = "dodge", fill = d.mes.VM$color, color = 'black') +
ggtitle("Morts per accident") + ylab("Mitja") + xlab("Hora") +
coord_polar()
# Conjunt de dades dels mesos
d.mes.all <- data.frame(victimes = dataset$F_VICTIMES,
morts = dataset$F_MORTS,
mes = dataset$mes,
theta = as.numeric(format(dataset$data, "%m")) * pi / 6)
# Calcul de la mitjana circular
d.mes.V <- circular.mean(tapply(d.mes.all$victimes, d.mes.all$theta, mean))
d.mes.M <- circular.mean(tapply(d.mes.all$morts, d.mes.all$theta, mean))
d.mes.theta.V <- d.mes.V[1]
d.mes.var.V <- d.mes.V[2]
d.mes.theta.M <- d.mes.M[1]
d.mes.var.M <- d.mes.M[2]
#Preparació pel test permutacional
n.rep <- 10000
n.set <- nrow(d.mes.all)
d.mes.rep <- tapply(d.mes.all$mes, d.mes.all$theta, length)
p.mes.theta.V <- numeric(n.rep)
p.mes.var.V <- numeric(n.rep)
p.mes.theta.M <- numeric(n.rep)
p.mes.var.M <- numeric(n.rep)
# Fer les permutacions
for(i in 1:n.rep) {
perm <- sample(n.set)
p.mes.V <- circular.mean(resample.means(d.mes.all$victimes, d.mes.rep ,perm))
p.mes.M <- circular.mean(resample.means(d.mes.all$morts, d.mes.rep, perm))
p.mes.theta.V[i] <- p.mes.V[1]
p.mes.var.V[i] <- p.mes.V[2]
p.mes.theta.M[i] <- p.mes.M[1]
p.mes.var.M[i] <- p.mes.M[2]
}
# Plot variancia (Victimes)
plot.mes.limit.V <- max(c(p.mes.var.V, d.mes.var.V * 1.1))
hist(p.mes.var.V, xlim = c(0, plot.mes.limit.V), main = '"Variancia" de l\'mes per victimes')
p.value.mes.V <- sum(p.mes.var.V >= d.mes.var.V) / n.rep
lines(rep(d.mes.var.V, 2), c(0, n.rep), col = 'red')
# Plot variancia (Morts)
plot.mes.limit.M <- max(c(p.mes.var.M, d.mes.var.M * 1.1))
hist(p.mes.var.M, xlim = c(0, plot.mes.limit.M), main = '"Variancia" de l\'mes per morts')
p.value.mes.M <- sum(p.mes.var.M >= d.mes.var.M) / n.rep
lines(rep(d.mes.var.M, 2), c(0, n.rep), col = 'red')
# Mostra les dades
{
cat('VÃctimes p-value:', p.value.mes.V, '\n')
cat('Morts p-value:', p.value.mes.M, '\n')
cat('VÃctimes mitja circular:', d.mes.theta.V * 6 / pi, ', varià ncia circular:', d.mes.var.V, '\n')
cat('Morts mitja circular:', d.mes.theta.M * 6 / pi, ', varià ncia circular:', d.mes.var.M, '\n')
}
## VÃctimes p-value: 0.0641
## Morts p-value: 0.4205
## VÃctimes mitja circular: 8.007152 , varià ncia circular: 0.0008382164
## Morts mitja circular: 9.627163 , varià ncia circular: 0.001744607
# Scatter plot de les permutacions (Victimes)
plot(p.mes.var.V * sin(p.mes.theta.V), p.mes.var.V * cos(p.mes.theta.V),
asp = 1, col = 'blue', lwd = .1, cex = .3,
xlab = '', ylab = '', main = 'scatter plot de les permutacions per victimes')
points(d.mes.var.V * sin(d.mes.theta.V), d.mes.var.V * cos(d.mes.theta.V),
col = 'red', cex = .5)
# Scatter plot de les permutacions (Morts)
plot(p.mes.var.M * sin(p.mes.theta.M), p.mes.var.M * cos(p.mes.theta.M),
asp = 1, col = 'blue', lwd = .1, cex = .3,
xlab = '', ylab = '', main = 'scatter plot de les permutacions per morts')
points(d.mes.var.M * sin(d.mes.theta.M), d.mes.var.M * cos(d.mes.theta.M),
col = 'red', cex = .5)
# Vectors rellevants
nom.via <- names(sort(tapply(dataset$F_VICTIMES, dataset$D_TIPUS_VIA, length), decreasing = TRUE))
nom.via <- nom.via[nom.via != 'Altres']
nom.viaComp <- c('Via urbana', 'Carretera\nconvencional',
'Autovia', 'Autopista',
'Camà rural\nPista forestal')
# DataFrame de les dades importants
d.via.VM <- data.frame(victimes = tapply(dataset$F_VICTIMES, dataset$D_TIPUS_VIA, mean)[nom.via],
morts = tapply(dataset$F_MORTS, dataset$D_TIPUS_VIA, mean)[nom.via],
n = tapply(dataset$F_VICTIMES, dataset$D_TIPUS_VIA, length)[nom.via],
nom = factor(nom.viaComp, levels = nom.viaComp))
# Plots de les dades
ggplot(data = d.via.VM, aes(y = n, x = nom)) +
geom_bar(stat = "identity", position = "dodge", fill = 'white', color = 'black') +
ggtitle("Histograma de numero d\'accidents") + ylab("Total") + xlab("Tipus de via") +
scale_y_log10()
ggplot(data = d.via.VM, aes(y = victimes, x = nom)) +
geom_bar(stat = "identity", position = "dodge", fill = 'white', color = 'black') +
ggtitle("Victimes per accident") + ylab("Mitja") + xlab("Tipus de via")
ggplot(data = d.via.VM, aes(y = morts, x = nom)) +
geom_bar(stat = "identity", position = "dodge", fill = 'white', color = 'black') +
ggtitle("Morts per accident") + ylab("Mitja") + xlab("Tipus de via")
# Conjubt de dades del tipus de via
d.via.all <- data.frame(victimes = dataset$F_VICTIMES,
morts = dataset$F_MORTS,
via = dataset$D_TIPUS_VIA)
# Preparacion pel test bootsrap no parametric
n.rep = 10000
d.via.c.V <- tapply(d.via.all$victimes, d.via.all$via, c)
d.via.c.M <- tapply(d.via.all$morts, d.via.all$via, c)
p.via.mean.V <- list()
p.via.mean.M <- list()
for(via in nom.via) {
p.via.mean.V[[via]] = numeric(n.rep)
p.via.mean.M[[via]] = numeric(n.rep)
}
# Fer el test
for(i in 1:n.rep) {
t.via.mean.V <- lapply(lapply(d.via.c.V, sample, replace = T), mean)
t.via.mean.M <- lapply(lapply(d.via.c.M, sample, replace = T), mean)
for(via in nom.via) {
p.via.mean.V[[via]][i] = t.via.mean.V[[via]]
p.via.mean.M[[via]][i] = t.via.mean.M[[via]]
}
}
# Calcular resultats
p.via.VM <- data.frame(victimes = c(unlist(lapply(p.via.mean.V, mean)))[nom.via],
morts = c(unlist(lapply(p.via.mean.M, mean)))[nom.via],
nom = factor(nom.viaComp, levels = nom.viaComp))
for(i in 1:length(nom.via)) {
via = nom.via[i]
p.via.VM$IC.max.V[i] = 2 * p.via.VM$victimes[i] - quantile(p.via.mean.V[[via]], 0.025)
p.via.VM$IC.min.V[i] = 2 * p.via.VM$victimes[i] - quantile(p.via.mean.V[[via]], 0.975)
p.via.VM$IC.max.M[i] = 2 * p.via.VM$morts[i] - quantile(p.via.mean.M[[via]], 0.025)
p.via.VM$IC.min.M[i] = 2 * p.via.VM$morts[i] - quantile(p.via.mean.M[[via]], 0.975)
}
# Mostrar les dades
for(i in 1:length(nom.via)) {
via = nom.via[i]
cat('Via:', via, '\n')
cat(' victimes:', p.via.VM$IC.min.V[i], p.via.VM$victimes[i], p.via.VM$IC.max.V[i], '\n')
cat(' morts:', p.via.VM$IC.min.M[i], p.via.VM$morts[i], p.via.VM$IC.max.M[i], '\n')
}
## Via: Via urbana( inclou carrer i carrer residencial)
## victimes: 1.284712 1.300773 1.31649
## morts: 0.06754585 0.07334764 0.0790048
## Via: Carretera convencional
## victimes: 1.797238 1.834285 1.869229
## morts: 0.2011732 0.2132244 0.2249316
## Via: Autovia
## victimes: 1.55424 1.726254 1.88108
## morts: 0.1299619 0.1688771 0.2057195
## Via: Autopista
## victimes: 1.749133 1.974114 2.174472
## morts: 0.1592462 0.2266819 0.2904679
## Via: Camà rural/pista forestal
## victimes: 1.154804 1.304352 1.4314
## morts: 0.0768539 0.1412638 0.1974213
# Plot dels resultats (Victimes)
ggplot(data = p.via.VM, aes(y = victimes, x = nom)) +
geom_bar(stat = "identity", position = "dodge", fill = 'white', color = 'black') +
geom_errorbar(aes(ymin = IC.min.V, ymax = IC.max.V), width=.2, position=position_dodge(.9)) +
ggtitle("Victimes per accident") + ylab("Mitja") + xlab("Tipus de via")
# Plot dels resultats (Morts)
ggplot(data = p.via.VM, aes(y = morts, x = nom)) +
geom_bar(stat = "identity", position = "dodge", fill = 'white', color = 'black') +
geom_errorbar(aes(ymin = IC.min.M, ymax = IC.max.M), width=.2, position=position_dodge(.9)) +
ggtitle("Morts per accident") + ylab("Mitja") + xlab("Tipus de via")
# Vectors rellevants
nom.carril <- names(sort(tapply(dataset$F_VICTIMES, dataset$D_CARRIL_ESPECIAL, length), decreasing = TRUE))
nom.carril <- nom.carril[nom.carril != 'Sense Especificar']
nom.carrilComp <- gsub("/", "\n", gsub(" ", "\n", nom.carril))
# DataFrame de les dades importants
d.carril.VM <- data.frame(victimes = tapply(dataset$F_VICTIMES, dataset$D_CARRIL_ESPECIAL, mean)[nom.carril],
morts = tapply(dataset$F_MORTS, dataset$D_CARRIL_ESPECIAL, mean)[nom.carril],
n = tapply(dataset$F_VICTIMES, dataset$D_CARRIL_ESPECIAL, length)[nom.carril],
nom = factor(nom.carrilComp, levels = nom.carrilComp))
# Plots de les dades
ggplot(data = d.carril.VM, aes(y = n, x = nom)) +
geom_bar(stat = "identity", position = "dodge", fill = 'darkgreen', color = 'black') +
ggtitle("Histograma de numero d\'accidents") + ylab("Total") + xlab("Tipus de carril") +
scale_y_log10()
ggplot(data = d.carril.VM, aes(y = victimes, x = nom)) +
geom_bar(stat = "identity", position = "dodge", fill = 'darkgreen', color = 'black') +
ggtitle("Victimes per accident") + ylab("Mitja") + xlab("Tipus de carril")
ggplot(data = d.carril.VM, aes(y = morts, x = nom)) +
geom_bar(stat = "identity", position = "dodge", fill = 'darkgreen', color = 'black') +
ggtitle("Morts per accident") + ylab("Mitja") + xlab("Tipus de carril")
# Conjunt de dades sobre el tipus de carril
d.carril.all <- data.frame(victimes = dataset$F_VICTIMES,
morts = dataset$F_MORTS,
carril = dataset$D_CARRIL_ESPECIAL)
# Preparacion pel test bootsrap no parametric
n.rep = 10000
d.carril.c.V <- tapply(d.carril.all$victimes, d.carril.all$carril, c)
d.carril.c.M <- tapply(d.carril.all$morts, d.carril.all$carril, c)
p.carril.mean.V <- list()
p.carril.mean.M <- list()
for(carril in nom.carril) {
p.carril.mean.V[[carril]] = numeric(n.rep)
p.carril.mean.M[[carril]] = numeric(n.rep)
}
# Fer el test
for(i in 1:n.rep) {
t.carril.mean.V <- lapply(lapply(d.carril.c.V, sample, replace = T), mean)
t.carril.mean.M <- lapply(lapply(d.carril.c.M, sample, replace = T), mean)
for(carril in nom.carril) {
p.carril.mean.V[[carril]][i] = t.carril.mean.V[[carril]]
p.carril.mean.M[[carril]][i] = t.carril.mean.M[[carril]]
}
}
# Calcular rresultats
p.carril.VM <- data.frame(victimes = c(unlist(lapply(p.carril.mean.V, mean)))[nom.carril],
morts = c(unlist(lapply(p.carril.mean.M, mean)))[nom.carril],
nom = factor(nom.carrilComp, levels = nom.carrilComp))
for(i in 1:length(nom.carril)) {
carril = nom.carril[i]
p.carril.VM$IC.max.V[i] = 2 * d.carril.VM$victimes[i] - quantile(p.carril.mean.V[[carril]], 0.025)
p.carril.VM$IC.min.V[i] = max(2 * d.carril.VM$victimes[i] - quantile(p.carril.mean.V[[carril]], 0.975), 0)
p.carril.VM$IC.max.M[i] = 2 * d.carril.VM$morts[i] - quantile(p.carril.mean.M[[carril]], 0.025)
p.carril.VM$IC.min.M[i] = max(2 * d.carril.VM$morts[i] - quantile(p.carril.mean.M[[carril]], 0.975), 0)
}
# Mostrar les dades
for(i in 1:length(nom.carril)) {
carril = nom.carril[i]
cat('Carril:', carril, '\n')
cat(' victimes:', p.carril.VM$IC.min.V[i], p.carril.VM$victimes[i], p.carril.VM$IC.max.V[i], '\n')
cat(' morts:', p.carril.VM$IC.min.M[i], p.carril.VM$morts[i], p.carril.VM$IC.max.M[i], '\n')
}
## Carril: No n'hi ha
## victimes: 1.533244 1.553533 1.572497
## morts: 0.1390521 0.1458711 0.1523364
## Carril: Carril bus
## victimes: 1.211712 1.375533 1.5
## morts: 0.05855856 0.1084257 0.1531532
## Carril: Carril bici
## victimes: 1.166667 1.276779 1.373016
## morts: 0.02380952 0.07957063 0.1269841
## Carril: Altres
## victimes: 1.232 1.528206 1.744
## morts: 0.088 0.1525992 0.208
## Carril: Carril acceleració
## victimes: 1.475806 1.757742 2.008065
## morts: 0.1048387 0.1689984 0.233871
## Carril: Carril central
## victimes: 1.559322 1.866116 2.118644
## morts: 0.08474576 0.1697237 0.2457627
## Carril: Carril d'alentiment
## victimes: 1.5 1.823399 2.108108
## morts: 0.04054054 0.1084959 0.1756757
## Carril: Carril lent
## victimes: 1.8 2.216045 2.583333
## morts: 0.1166667 0.2344183 0.3333333
## Carril: Carril avançament
## victimes: 1.861111 2.447892 2.972222
## morts: 0.2777778 0.4729306 0.6666667
## Carril: Habilitació voral/carril addicional
## victimes: 1.068966 1.449455 1.758621
## morts: 0.06896552 0.2411724 0.3793103
## Carril: Carril reversible
## victimes: 1.235294 2.182371 2.882353
## morts: 0 0.05855294 0.1176471
## Carril: Carril habilitat en sentit contrari habitual
## victimes: 1 1.751908 2.333333
## morts: 0 0.1646333 0.3333333
#Plot dels resultats (Victimes)
ggplot(data = p.carril.VM, aes(y = victimes, x = nom)) +
geom_bar(stat = "identity", position = "dodge", fill = 'darkgreen', color = 'black') +
geom_errorbar(aes(ymin = IC.min.V, ymax = IC.max.V), width=.2, position=position_dodge(.9)) +
ggtitle("Victimes per accident") + ylab("Mitja") + xlab("Tipus de carril")
#Plot dels resultats (Morts)
ggplot(data = p.carril.VM, aes(y = morts, x = nom)) +
geom_bar(stat = "identity", position = "dodge", fill = 'darkgreen', color = 'black') +
geom_errorbar(aes(ymin = IC.min.M, ymax = IC.max.M), width=.2, position=position_dodge(.9)) +
ggtitle("Morts per accident") + ylab("Mitja") + xlab("Tipus de carril")
# Vectors rellevats
val.vel <- levels(factor(dataset$C_VELOCITAT_VIA))
val.vel10 <- seq(10, 120, 10)
nom.vel <- paste(val.vel, 'km/h')
# Conjunt de dades sobre la velocitat
d.vel.all <- data.frame(victimes = dataset$F_VICTIMES[!is.na(dataset$C_VELOCITAT_VIA)],
morts = dataset$F_MORTS[!is.na(dataset$C_VELOCITAT_VIA)],
velocitat = dataset$C_VELOCITAT_VIA[!is.na(dataset$C_VELOCITAT_VIA)],
vel10 = factor(dataset$C_VELOCITAT_VIA[!is.na(dataset$C_VELOCITAT_VIA)] %/% 10 * 10, levels = val.vel10))
d.vel.all$MpV100 <- d.vel.all$morts / d.vel.all$victimes * 100
# DtaFrame de les dades importants
d.vel.VM <- data.frame(victimes = tapply(d.vel.all$victimes, d.vel.all$velocitat, mean)[val.vel],
morts = tapply(d.vel.all$morts, d.vel.all$velocitat, mean)[val.vel],
MpV100 = tapply(d.vel.all$MpV100, d.vel.all$velocitat, mean)[val.vel],
n = tapply(d.vel.all$victimes, d.vel.all$velocitat, length)[val.vel],
nom = factor(nom.vel, levels = nom.vel))
#Plot de les dades
ggplot(data = d.vel.VM, aes(y = n, x = nom)) +
geom_bar(stat = "identity", position = "dodge", fill = 'cyan', color = 'black') +
ggtitle("Histograma de numero d\'accidents") + ylab("Total") + xlab("Velocitat mà xima") +
scale_y_continuous(trans = 'log10')
ggplot(data = d.vel.VM, aes(y = victimes, x = nom)) +
geom_bar(stat = "identity", position = "dodge", fill = 'cyan', color = 'black') +
ggtitle("Victimes per accident") + ylab("Mitja") + xlab("Velocitat mà xima")
ggplot(data = d.vel.VM, aes(y = morts, x = nom)) +
geom_bar(stat = "identity", position = "dodge", fill = 'cyan', color = 'black') +
ggtitle("Morts per accident") + ylab("Mitja") + xlab("Velocitat mà xima")
ggplot(data = d.vel.VM, aes(y = MpV100, x = nom)) +
geom_bar(stat = "identity", position = "dodge", fill = 'cyan', color = 'black') +
ggtitle("Percentatge de mortalitat de les victimes") + ylab("Percentatge de vÃctimes mortes") + xlab("Velocitat mà xima")
# Calcul de les correlacion originals
d.vel.pearson.V = cor(d.vel.all$victimes, d.vel.all$velocitat, method = 'pearson')
d.vel.spearman.V = cor(d.vel.all$victimes, d.vel.all$velocitat, method = 'spearman')
d.vel.pearson.M = cor(d.vel.all$morts, d.vel.all$velocitat, method = 'pearson')
d.vel.spearman.M = cor(d.vel.all$morts, d.vel.all$velocitat, method = 'spearman')
d.vel.pearson.MpV100 = cor(d.vel.all$MpV100, d.vel.all$velocitat, method = 'pearson')
d.vel.spearman.MpV100 = cor(d.vel.all$MpV100, d.vel.all$velocitat, method = 'spearman')
#Preparació de test permutacional
n.rep <- 3000
p.vel.pearson.V <- numeric(n.rep)
p.vel.spearman.V <- numeric(n.rep)
p.vel.pearson.M <- numeric(n.rep)
p.vel.spearman.M <-numeric(n.rep)
p.vel.pearson.MpV100 <- numeric(n.rep)
p.vel.spearman.MpV100 <-numeric(n.rep)
# Fer permutacions
for(i in 1:n.rep) {
p.vel <- sample(d.vel.all$velocitat)
p.vel.pearson.V[i] = cor(d.vel.all$victimes, p.vel, method = 'pearson')
p.vel.spearman.V[i] = cor(d.vel.all$victimes, p.vel, method = 'spearman')
p.vel.pearson.M[i] = cor(d.vel.all$morts, p.vel, method = 'pearson')
p.vel.spearman.M[i] = cor(d.vel.all$morts, p.vel, method = 'spearman')
p.vel.pearson.MpV100[i] = cor(d.vel.all$MpV100, p.vel, method = 'pearson')
p.vel.spearman.MpV100[i] = cor(d.vel.all$MpV100, p.vel, method = 'spearman')
}
# Plots de Victimes
plot.vel.limit.pearson.V <- c(min(p.vel.pearson.V) * 1.1,
max(c(p.vel.pearson.V, d.vel.pearson.V) * 1.1))
hist(p.vel.pearson.V, xlim = plot.vel.limit.pearson.V, main = 'Correlació entre velocitat i vÃctimes (pearson)')
p.value.vel.pearson.V <- sum(p.vel.pearson.V >= d.vel.pearson.V) / n.rep
lines(rep(d.vel.pearson.V, 2), c(0, n.rep), col = 'red')
plot.vel.limit.spearman.V <- c(min(p.vel.spearman.V) * 1.1,
max(c(p.vel.spearman.V, d.vel.spearman.V) * 1.1))
hist(p.vel.spearman.V, xlim = plot.vel.limit.spearman.V, main = 'Correlació entre velocitat i vÃctimes (spearman)')
p.value.vel.spearman.V <- sum(p.vel.spearman.V >= d.vel.spearman.V) / n.rep
lines(rep(d.vel.spearman.V, 2), c(0, n.rep), col = 'red')
# Plots de Morts
plot.vel.limit.pearson.M <- c(min(p.vel.pearson.M) * 1.1,
max(c(p.vel.pearson.M, d.vel.pearson.M) * 1.1))
hist(p.vel.pearson.M, xlim = plot.vel.limit.pearson.M, main = 'Correlació entre velocitat i morts (pearson)')
p.value.vel.pearson.M <- sum(p.vel.pearson.M >= d.vel.pearson.M) / n.rep
lines(rep(d.vel.pearson.M, 2), c(0, n.rep), col = 'red')
plot.vel.limit.spearman.M <- c(min(p.vel.spearman.M) * 1.1,
max(c(p.vel.spearman.M, d.vel.spearman.M) * 1.1))
hist(p.vel.spearman.M, xlim = plot.vel.limit.spearman.M, main = 'Correlació entre velocitat i morts (spearman)')
p.value.vel.spearman.M <- sum(p.vel.spearman.M >= d.vel.spearman.M) / n.rep
lines(rep(d.vel.spearman.M, 2), c(0, n.rep), col = 'red')
# Plots de Mortalitat
plot.vel.limit.pearson.MpV100 <- c(min(p.vel.pearson.MpV100) * 1.1,
max(c(p.vel.pearson.MpV100, d.vel.pearson.MpV100) * 1.1))
hist(p.vel.pearson.MpV100, xlim = plot.vel.limit.pearson.MpV100, main = 'Correlació entre velocitat i percentatge de vÃctimes mortes (pearson)')
p.value.vel.pearson.MpV100 <- sum(p.vel.pearson.MpV100 >= d.vel.pearson.MpV100) / n.rep
lines(rep(d.vel.pearson.MpV100, 2), c(0, n.rep), col = 'red')
plot.vel.limit.spearman.MpV100 <- c(min(p.vel.spearman.MpV100) * 1.1,
max(c(p.vel.spearman.MpV100, d.vel.spearman.MpV100) * 1.1))
hist(p.vel.spearman.MpV100, xlim = plot.vel.limit.spearman.MpV100, main = 'Correlació entre velocitat i percentatge de vÃctimes mortes (spearman)')
p.value.vel.spearman.MpV100 <- sum(p.vel.spearman.MpV100 >= d.vel.spearman.MpV100) / n.rep
lines(rep(d.vel.spearman.MpV100, 2), c(0, n.rep), col = 'red')
# Mostrar les dades
{
cat('Victimes:\n')
cat(' Pearson: ', d.vel.pearson.V, ',p-value:', p.value.vel.pearson.V, '\n')
cat(' Spearman: ', d.vel.spearman.V, ',p-value:', p.value.vel.spearman.V, '\n')
cat('Morts:\n')
cat(' Pearson: ', d.vel.pearson.M, ',p-value:', p.value.vel.pearson.M, '\n')
cat(' Spearman: ', d.vel.spearman.M, ',p-value:', p.value.vel.spearman.M, '\n')
cat('Mortalitat:\n')
cat(' Pearson: ', d.vel.pearson.MpV100, ',p-value:', p.value.vel.pearson.MpV100, '\n')
cat(' Spearman: ', d.vel.spearman.MpV100, ',p-value:', p.value.vel.spearman.MpV100, '\n')
}
## Victimes:
## Pearson: 0.04008627 ,p-value: 0
## Spearman: 0.01377029 ,p-value: 0.05666667
## Morts:
## Pearson: 0.04403149 ,p-value: 0
## Spearman: 0.0318193 ,p-value: 0
## Mortalitat:
## Pearson: 0.0351944 ,p-value: 0
## Spearman: 0.03048427 ,p-value: 0
# Calcular pendents
d.vel.fit.V <- lm(victimes~velocitat, data = d.vel.all)
d.vel.fit.M <- lm(morts~velocitat, data = d.vel.all)
d.vel.fit.MpV100 <- lm(MpV100~velocitat, data = d.vel.all)
d.vel.10.VM <- data.frame(victimes = tapply(d.vel.all$victimes, d.vel.all$vel10, mean),
morts = tapply(d.vel.all$morts, d.vel.all$vel10, mean),
MpV100 = tapply(d.vel.all$MpV100, d.vel.all$vel10, mean),
n = tapply(d.vel.all$victimes, d.vel.all$vel10, length),
vel10 = val.vel10)
# Plots de les pendents (Victimes)
ggplot(data = d.vel.10.VM, aes(y = victimes, x = vel10)) +
geom_bar(stat = "identity", position = "dodge", fill = 'cyan', color = 'black') +
ggtitle("VÃctimes per accident") + ylab("Mitja") + xlab("Velocitat mà xima") +
geom_abline(intercept = summary(d.vel.fit.V)$coefficient[1],
slope = summary(d.vel.fit.V)$coefficient[2], color = 'red')
# Plots de les pendents (Morts)
ggplot(data = d.vel.10.VM, aes(y = morts, x = vel10)) +
geom_bar(stat = "identity", position = "dodge", fill = 'cyan', color = 'black') +
ggtitle("Morts per accident") + ylab("Mitja") + xlab("Velocitat mà xima") +
geom_abline(intercept = summary(d.vel.fit.M)$coefficient[1],
slope = summary(d.vel.fit.M)$coefficient[2], color = 'red')
# Plots de les pendents (Mortalitat)
ggplot(data = d.vel.10.VM, aes(y = MpV100, x = vel10)) +
geom_bar(stat = "identity", position = "dodge", fill = 'cyan', color = 'black') +
ggtitle("Percentatge de mortalitat de les victimes") + ylab("Percentatge de vÃctimes mortes") + xlab("Velocitat mà xima") +
geom_abline(intercept = summary(d.vel.fit.MpV100)$coefficient[1],
slope = summary(d.vel.fit.MpV100)$coefficient[2], color = 'red')
# Calcular intervals de confiança per les pendents
d.vel.IC.V <- summary(d.vel.fit.V)$coefficient[2] + c(-1.96, 1.96) * summary(d.vel.fit.V)$coefficient[4]
d.vel.IC.M <- summary(d.vel.fit.M)$coefficient[2] + c(-1.96, 1.96) * summary(d.vel.fit.M)$coefficient[4]
d.vel.IC.MpV100 <- summary(d.vel.fit.MpV100)$coefficient[2] + c(-1.96, 1.96) * summary(d.vel.fit.MpV100)$coefficient[4]
# Mostrar IdC
{
show(d.vel.IC.V)
show(d.vel.IC.M)
show(d.vel.IC.MpV100)
}
## [1] 0.001247525 0.003019904
## [1] 0.0004982629 0.0011039999
## [1] 0.02220343 0.06208294
# Conjunt de dades sobre la visibilitat
d.visib.all <- data.frame(victimes = dataset$F_VICTIMES, morts = dataset$F_MORTS,
boira = dataset$D_BOIRA,
nit = dataset$grupHor,
llum = dataset$D_LLUMINOSITAT)
levels(d.visib.all$boira) <- c("No n'hi ha", 'Si')
levels(d.visib.all$nit) <- c('Dia', 'Nit', 'Dia')
levels(d.visib.all$llum) <- c('Reduida', 'Bona', 'Reduida', 'Reduida', 'Bona', 'Cap', 'NA')
d.visib.mean.Total <- c(tapply(d.visib.all$victimes, d.visib.all$boira, length),
tapply(d.visib.all$victimes, d.visib.all$nit, length),
tapply(d.visib.all$victimes, d.visib.all$llum, length))
d.visib.mean.V <- c(tapply(d.visib.all$victimes, d.visib.all$boira, mean),
tapply(d.visib.all$victimes, d.visib.all$nit, mean),
tapply(d.visib.all$victimes, d.visib.all$llum, mean))
d.visib.mean.M <- c(tapply(d.visib.all$morts, d.visib.all$boira, mean),
tapply(d.visib.all$morts, d.visib.all$nit, mean),
tapply(d.visib.all$morts, d.visib.all$llum, mean))
d.visib.mean.Total <- d.visib.mean.Total[names(d.visib.mean.Total) != 'NA']
d.visib.mean.V <- d.visib.mean.V[names(d.visib.mean.V) != 'NA']
d.visib.mean.M <- d.visib.mean.M[names(d.visib.mean.M) != 'NA']
d.visib.type <- factor(c(rep('boira', sum(levels(d.visib.all$boira) != 'NA')),
rep('nit', sum(levels(d.visib.all$nit) != 'NA')),
rep('iluminació', sum(levels(d.visib.all$llum) != 'NA'))),
levels = c('boira', 'nit', 'iluminació'))
d.visib.class <- factor(names(d.visib.mean.V),
levels = c('Si', "No n'hi ha", 'Dia', 'Nit', 'Bona', 'Reduida', 'Cap'))
d.visib.opt <- levels(d.visib.class)
# DataFrame de les dades mési importants sobre la visibilitat
d.visib.VM <- data.frame(victimes = d.visib.mean.V,
morts = d.visib.mean.M,
n = d.visib.mean.Total,
class = d.visib.class,
type = d.visib.type)
# Plot de les dades (Victimes)
ggplot(d.visib.VM, aes(y = victimes, x = type, fill = class)) +
geom_bar(stat = "identity", position = "dodge") +
ggtitle("Victimes per accident") + ylab("Mitja") + xlab("Factor de visibilitat")
# Plot de les dades (Morts)
ggplot(d.visib.VM, aes(y = morts, x = type, fill = class)) +
geom_bar(stat = "identity", position = "dodge") +
ggtitle("Morts per accident") + ylab("Mitja") + xlab("Factor de visibilitat")
# Preparació per fer bootstrap
n.rep <- 5000
bst.visib.mean.V <- data.frame(i = 1:n.rep)
bst.visib.mean.M <- data.frame(i = 1:n.rep)
# Bucle de les imulacions de boostrap
for(i in 1:n.rep) {
for(j in 1:7) {
bst.visib.mean.V[[d.visib.opt[d.visib.class[j]]]][i] = mean(rpois(d.visib.VM$n[j], d.visib.VM$victimes[j]))
bst.visib.mean.M[[d.visib.opt[d.visib.class[j]]]][i] = mean(rpois(d.visib.VM$n[j], d.visib.VM$morts[j]))
}
}
bst.visib.VM <- data.frame(victimes = c(unlist(lapply(bst.visib.mean.V, mean)))[d.visib.opt],
IC.min.V = c(unlist(lapply(bst.visib.mean.V, quantile, 0.025)))[paste(d.visib.opt, '2.5%', sep = '.')],
IC.max.V = c(unlist(lapply(bst.visib.mean.V, quantile, 0.975)))[paste(d.visib.opt, '97.5%', sep = '.')],
morts = c(unlist(lapply(bst.visib.mean.M, mean)))[d.visib.opt],
IC.min.M = c(unlist(lapply(bst.visib.mean.M, quantile, 0.025)))[paste(d.visib.opt, '2.5%', sep = '.')],
IC.max.M = c(unlist(lapply(bst.visib.mean.M, quantile, 0.975)))[paste(d.visib.opt, '97.5%', sep = '.')],
class = factor(d.visib.opt, levels = d.visib.opt),
type = d.visib.type)
# Mostrar les dades
for(i in 1:length(d.visib.opt)) {
opt = d.visib.opt[i]
type = levels(bst.visib.VM$type)[bst.visib.VM$type[i]]
cat(type, ':', opt, '\n')
cat(' victimes:', bst.visib.VM$IC.min.V[i], bst.visib.VM$victimes[i], bst.visib.VM$IC.max.V[i], '\n')
cat(' morts:', bst.visib.VM$IC.min.M[i], bst.visib.VM$morts[i], bst.visib.VM$IC.max.M[i], '\n')
}
## boira : Si
## victimes: 1.411531 1.51659 1.622266
## morts: 0.07355865 0.1016231 0.1292247
## boira : No n'hi ha
## victimes: 1.532481 1.551805 1.571569
## morts: 0.1352698 0.141031 0.1468272
## nit : Dia
## victimes: 1.504028 1.524244 1.544174
## morts: 0.123079 0.1288659 0.1345897
## nit : Nit
## victimes: 1.68647 1.743212 1.802309
## morts: 0.2016975 0.2217 0.242636
## iluminació : Bona
## victimes: 1.466962 1.487332 1.508501
## morts: 0.1130063 0.1187302 0.124612
## iluminació : Reduida
## victimes: 1.660039 1.71447 1.770611
## morts: 0.1639185 0.1822759 0.2007759
## iluminació : Cap
## victimes: 1.911543 1.991864 2.07398
## morts: 0.2772109 0.3095497 0.3418367
# Plot dels resultats (Victimes)
ggplot(data = bst.visib.VM, aes(y = victimes, x = type, fill = class)) +
geom_bar(stat = "identity", position = "dodge") +
geom_errorbar(aes(ymin = IC.min.V, ymax = IC.max.V), width=.2, position=position_dodge(.9)) +
ggtitle("Victimes per accident") + ylab("Mitja") + xlab("Tipus de cFactor de visibilitatarril")
# Plot dels resultats (Morts)
ggplot(data = bst.visib.VM, aes(y = morts, x = type, fill = class)) +
geom_bar(stat = "identity", position = "dodge") +
geom_errorbar(aes(ymin = IC.min.M, ymax = IC.max.M), width=.2, position=position_dodge(.9)) +
ggtitle("Morts per accident") + ylab("Mitja") + xlab("Factor de visibilitat")
# Vectors rellevants
nom.superficie <- names(sort(tapply(dataset$F_VICTIMES, dataset$D_SUPERFICIE, length), decreasing = TRUE))
nom.superficie <- nom.superficie[nom.superficie != 'Sense especificar']
# DataFrame de les dades importants
d.sup.VM <- data.frame(victimes = tapply(dataset$F_VICTIMES, dataset$D_SUPERFICIE, mean)[nom.superficie],
morts = tapply(dataset$F_MORTS, dataset$D_SUPERFICIE, mean)[nom.superficie],
n = tapply(dataset$F_VICTIMES, dataset$D_SUPERFICIE, length)[nom.superficie],
nom = factor(nom.superficie, levels = nom.superficie),
color = 'red')
# Plot de les dades
ggplot(data = d.sup.VM, aes(x = nom, y = n), col) +
geom_bar(width = 1, stat = "identity", position = "dodge", fill = d.sup.VM$color, color = 'black') +
ggtitle("Histograma de numero d\'accidents") + ylab("Total") + xlab("Terreny") +
scale_y_continuous(trans = 'log10')
ggplot(data = d.sup.VM, aes(x = nom, y = victimes), col) +
geom_bar(width = 1, stat = "identity", position = "dodge", fill = d.sup.VM$color, color = 'black') +
ggtitle("Victimes per accident") + ylab("Mitja") + xlab("Terreny")
ggplot(data = d.sup.VM, aes(x = nom, y = morts), col) +
geom_bar(width = 1, stat = "identity", position = "dodge", fill = d.sup.VM$color, color = 'black') +
ggtitle("Morts per accident") + ylab("Mitja") + xlab("Terreny")
# Declaracio de variables rellevants
d.sup.type.n <- 1:length(nom.superficie)
d.sup.n <- sum(d.sup.VM$n)
#Conjunt de dades sobre la superficie
d.sup.all <- data.frame(victimes = dataset$F_VICTIMES[dataset$D_SUPERFICIE != 'Sense especificar'],
morts = dataset$F_MORTS[dataset$D_SUPERFICIE != 'Sense especificar'],
superficie = dataset$D_SUPERFICIE[dataset$D_SUPERFICIE != 'Sense especificar'])
# Preparació per fer non parametric boostrap
n.rep <- 10000
p.sup.V <- matrix(nrow = length(nom.superficie), ncol = n.rep)
p.sup.M <- matrix(nrow = length(nom.superficie), ncol = n.rep)
# Bucle de les permutacions
for(i in 1:n.rep) {
perm <- sample(d.sup.n, replace = T)
p.sup.V[, i] <- resample.means(d.sup.all$victimes, d.sup.VM$n, perm)
p.sup.M[, i] <- resample.means(d.sup.all$morts, d.sup.VM$n, perm)
}
# Calcular els resultats
dp.sup.VM <- data.frame(victimes = d.sup.VM$victimes,
morts = d.sup.VM$morts,
nom = factor(nom.superficie, levels = nom.superficie))
for(i in d.sup.type.n) {
dp.sup.VM$IC.max.V[i] = 2 * mean(p.sup.V[i, ]) - quantile(p.sup.V[i, ], 0.025)
dp.sup.VM$IC.min.V[i] = max(2 * mean(p.sup.V[i, ]) - quantile(p.sup.V[i, ], 0.975), 1)
dp.sup.VM$IC.max.M[i] = 2 * mean(p.sup.M[i, ]) - quantile(p.sup.M[i, ], 0.025)
dp.sup.VM$IC.min.M[i] = max(2 * mean(p.sup.M[i, ]) - quantile(p.sup.M[i, ], 0.975), 0)
}
# Mostrar les dades
for(i in 1:length(nom.superficie)) {
terreny = nom.superficie[i]
cat('Terreny:', terreny, '\n')
cat(' victimes:', dp.sup.VM$victimes[i], ', interval de confiança', dp.sup.VM$IC.min.V[i], dp.sup.VM$IC.max.V[i], '\n')
cat(' morts:', dp.sup.VM$morts[i], ', interval de confiança', dp.sup.VM$IC.min.M[i], dp.sup.VM$IC.max.M[i], '\n')
}
## Terreny: Sec i net
## victimes: 1.553383 , interval de confiança 1.531714 1.569489
## morts: 0.1405058 , interval de confiança 0.1334798 0.1463447
## Terreny: Mullat
## victimes: 1.591631 , interval de confiança 1.453525 1.6339
## morts: 0.1385281 , interval de confiança 0.1086355 0.1692416
## Terreny: Relliscós
## victimes: 1.231481 , interval de confiança 1.29318 1.746883
## morts: 0.08333333 , interval de confiança 0.05817963 0.2063278
## Terreny: Inundat
## victimes: 1.193182 , interval de confiança 1.258914 1.770277
## morts: 0.1136364 , interval de confiança 0.04189091 0.2123455
## Terreny: Gelat
## victimes: 1.4 , interval de confiança 1 1.966
## morts: 0.2666667 , interval de confiança 0 0.28096
## Terreny: Nevat
## victimes: 1.444444 , interval de confiança 1 2.096
## morts: 0 , interval de confiança 0 0.2778667
# Plot dels resultats (Victimes)
ggplot(data = dp.sup.VM, aes(x = nom, y = victimes), col) +
geom_bar(width = 1, stat = "identity", position = "dodge", fill = d.sup.VM$color, color = 'black') +
geom_errorbar(aes(ymin = IC.min.V, ymax = IC.max.V), width=.2, position=position_dodge(.9)) +
ggtitle("Victimes per accident") + ylab("Mitja") + xlab("Terreny")
# Plot dels resultats (Morts)
ggplot(data = dp.sup.VM, aes(x = nom, y = morts), col) +
geom_bar(width = 1, stat = "identity", position = "dodge", fill = d.sup.VM$color, color = 'black') +
geom_errorbar(aes(ymin = IC.min.M, ymax = IC.max.M), width=.2, position=position_dodge(.9)) +
ggtitle("Morts per accident") + ylab("Mitja") + xlab("Terreny")